*******************************************************************************************************************************************MANUSCRIPT: Authorized Generic Entry prior to Patent Expiry: Reassessing Incentives for Independent Generic Entry *AUTHOR:     Silvia Appelt, University of Munich, silvia.appelt@lrz.uni-muenchen.de******************************************************************************************************************************************				                         *** NPI DATA PREPARATION (STEP 1 & 2) ***						            		* Date last edit:  11 January 2015 * ******************************************************************************************************************************************   version 13.0set more offcap log closeclear*** (I) Limit data to medicines for human useuse npi_hist_clean.dta, cleardrop pzn_index* aut_idem* pzn_no prod_change out_of_stock0* gen_id_db0* subs_db0* produkt_db0* produkt_pi_db0* producer_db0* parent_db0* producer_hist_db0* *Human medicines drop if zklass_id!=1*Regenerate identifier of changes in producers selling productsby pzn_id_mod, sort: egen pzn_mod_index=count(pzn_id_mod)generate prod_change=0 replace prod_change=1 if pzn_mod_index!=1count if pzn_id!=pzn_id_modcount if pzn_diff==1sort pzn_id_modegen pzn_no=group(pzn_id_mod)sum pzn_nosort pzn_id_modsave npi_hist_human.dta, replace*******(II) NPI Data - Human medicines*(1) Generate data and variables for econometric analysis*** Identify monosubstances gen plus = strpos(substanzen, "+")gen w_par = regexm(substanzen,"\(*W\)")gen mono_substance=0replace mono_substance=1 if plus==0 & w_par==0                                tab mono_substancedrop plus w_par*** OTC vs. Rx Drugsgen rx=0recode rx (0=1) if abgabe=="Bet�ubungsmittel" | abgabe=="rezeptpflichtig"tab abgabetab rx*** Date of launch retail form (pzn) replace einfuehrung= . if einfuehrung== -309777*equivalent to 11.11.1111 (date not available)generate year_form_launch=year(einfuehrung)generate month_form_launch=month(einfuehrung)*** Date of product launch [=first launch medical product]gen underscore = strpos(prod_launch, "_")gen year_launch_s =substr(prod_launch,1,underscore-1)gen month_launch_s =substr(prod_launch,underscore+1,.)destring month_launch_s, gen (month_launch)destring year_launch_s, gen (year_launch)drop year_launch_s month_launch_s underscoregenerate date_launch=mdy(month_launch, 1, year_launch)  replace date_launch= . if year_launch==1111replace month_launch= . if year_launch==1111replace year_launch= . if year_launch==1111 *Replace date retail form launch by date product launch if missing - monosubstances only (subject analysis)gen help=einfuehrung replace einfuehrung=date_launch if help== . & mono_substance==1replace year_form_launch=year_launch if help== . & mono_substance==1drop help**** Renaming and formating of variablesrename hersteller producerrename hersteller_hist producer_histrename konzern parentrename substanzen substancesrename produkt productrename produkt_pi product_pirename handelsformen ret_form											   rename einfuehrung date_form_launchrename rezept ret_form_prescription	rename zzb copayrename abgabe dispensaryrename festbetrag ref_pricerename zusatzklasse drug_classrename staerke strengthrename packungen packagesrename generika drug_typeformat date_launch %tdformat date_form_launch %td*** Renaming of price, turnover and revenue variables*annual (1999-2003)local y=1999quietly while `y'< 2004 {rename absatz_gh`y' wholesale_vol`y'rename abverkauf_eh`y' wh_turnover`y'rename naturalrabatt_eh`y' wh_rebate_vol`y'rename retouren_eh`y' wh_retour_vol`y'rename umsatz_gh`y' wholesale_rev`y'rename abverkauf_ums`y' wh_revenue`y'rename naturalrabatt_ums`y' wh_rebate_rev`y'rename retouren_ums`y' wh_retour_rev`y'rename direkt_eh`y' direct_vol`y'rename direkt_ums`y' direct_rev`y'local y=`y'+1}*Note:wholesale_vol=wh_turnover+wh_rebate_vol-wh_retour_vol*monthly (2007-2007)local y=2002quietly while `y'< 2008 {local x=1quietly while `x'<13 { rename absatz_gh_`x'_`y' wholesale_vol`x'_`y'rename abverkauf_eh_`x'_`y' wh_turnover`x'_`y'rename naturalrabatt_eh_`x'_`y' wh_rebate_vol`x'_`y'rename retouren_eh_`x'_`y' wh_retour_vol`x'_`y'rename umsatz_gh_`x'_`y' wholesale_rev`x'_`y'rename hap_`x'_`y' pprice`x'_`y'rename avp_`x'_`y' retprice`x'_`y'rename fbetrag_`x'_`y' refprice`x'_`y'rename direkt_eh_`x'_`y' direct_vol`x'_`y'rename direkt_ums_`x'_`y' direct_rev`x'_`y'local x=`x'+1}local y=`y'+1}* Set prices of 0 to missing (and likewise turnover/revenue if equal to zero and price is missing)* compute monthly wholesale revenue datalocal y=2002quietly while `y'<2008{local x=1quietly while `x'<13 {replace pprice`x'_`y'= . if pprice`x'_`y'==0replace refprice`x'_`y'= . if refprice`x'_`y'==0replace retprice`x'_`y'= . if retprice`x'_`y'==0replace wholesale_vol`x'_`y'= . if wholesale_vol`x'_`y'==0 & pprice`x'_`y'== .replace wh_turnover`x'_`y'= . if wh_turnover`x'_`y'==0 & pprice`x'_`y'== .replace wh_rebate_vol`x'_`y'= . if wh_rebate_vol`x'_`y'==0 & pprice`x'_`y'== .replace wh_retour_vol`x'_`y'= . if wh_retour_vol`x'_`y'==0 & pprice`x'_`y'== .replace wholesale_rev`x'_`y'= . if wholesale_rev`x'_`y'==0 & pprice`x'_`y'== .replace direct_vol`x'_`y'= . if direct_vol`x'_`y'==0 & pprice`x'_`y'== .replace direct_rev`x'_`y'= . if direct_rev`x'_`y'==0 & pprice`x'_`y'== .generate wh_revenue`x'_`y' = wh_turnover`x'_`y' * pprice`x'_`y'generate wh_rebate_rev`x'_`y' = wh_rebate_vol`x'_`y' * pprice`x'_`y'generate wh_retour_rev`x'_`y' = wh_retour_vol`x'_`y' * pprice`x'_`y'local x=`x'+1}local y=`y'+1}*Aggregate monthly turnover and revenue data at year level [2004-2007]local y=2004quietly while `y'< 2008 {egen wholesale_vol`y' = rowtotal(wholesale_vol*_`y')egen wh_turnover`y'= rowtotal(wh_turnover*_`y')egen wh_rebate_vol`y' = rowtotal(wh_rebate_vol*_`y')egen wh_retour_vol`y' = rowtotal(wh_retour_vol*_`y')egen wholesale_rev`y' = rowtotal(wholesale_rev*_`y')egen wh_revenue`y' = rowtotal(wh_revenue*_`y')egen wh_rebate_rev`y' = rowtotal(wh_rebate*_`y')egen wh_retour_rev`y' = rowtotal(wh_retour*_`y')egen direct_vol`y' = rowtotal(direct_vol*_`y')egen direct_rev`y' = rowtotal(direct_rev*_`y')local y=`y'+1}*Aggregate turnover and revenue data (based on producer and retail prices): wholesale plus direct sales*a) Turnoverlocal y=1999quietly while `y'< 2002 {gen total_vol`y'= wholesale_vol`y' + direct_vol`y'local y=`y'+1}local y=2002quietly while `y'< 2008 {local x=1quietly while `x'< 13 { gen total_vol`x'_`y'= wholesale_vol`x'_`y' + direct_vol`x'_`y'local x=`x'+1}egen total_vol`y' = rowtotal(total_vol*_`y')local y=`y'+1}*b)Revenue at producer priceslocal y=1999quietly while `y'< 2002 {gen total_rev`y'= wholesale_rev`y' + direct_rev`y'local y=`y'+1}local y=2002quietly while `y'< 2008 {local x=1quietly while `x'< 13 { gen total_rev`x'_`y'= wholesale_rev`x'_`y' + direct_rev`x'_`y'local x=`x'+1}egen total_rev`y' = rowtotal(total_rev*_`y')local y=`y'+1}*c) Revenue at retail priceslocal y=2002quietly while `y'< 2008 {local x=1quietly while `x'< 13 { gen total_ret_rev`x'_`y'= total_vol`x'_`y' * retprice`x'_`y'local x=`x'+1}egen total_ret_rev`y' = rowtotal(total_ret_rev*_`y')local y=`y'+1}****** Delete pzn_id_mod duplicates arising from the integration of historical producer information (not relevant for analysis) *Monthly local namelist1 pprice retprice refprice wholesale_vol wh_turnover wh_rebate_vol wh_retour_vol wholesale_rev wh_revenue wh_rebate_rev wh_retour_rev direct_vol direct_rev total_vol total_rev total_ret_revforeach name in `namelist1' {local y=2002quietly while `y'< 2008 {local x=1quietly while `x'< 13 { by pzn_id_mod, sort: egen h_`name'`x'_`y'=total(`name'`x'_`y')replace `name'`x'_`y'=h_`name'`x'_`y'drop h_`name'*local x=`x'+1}local y=`y'+1}}*Annual local namelist2 wholesale_vol wh_turnover wh_rebate_vol wh_retour_vol wholesale_rev wh_revenue wh_rebate_rev wh_retour_rev direct_vol direct_rev total_vol total_rev foreach name in `namelist2' {local y=1999quietly while `y'< 2008 {by pzn_id_mod, sort: egen h_`name'`y'=total(`name'`y')replace `name'`y'=h_`name'`y'drop h_`name'*local y=`y'+1}}local namelist3 total_ret_revforeach name in `namelist3' {local y=2002quietly while `y'< 2008 {by pzn_id_mod, sort: egen h_`name'`y'=total(`name'`y')replace `name'`y'=h_`name'`y'drop h_`name'*local y=`y'+1}}sum pzn_nodrop producer_hist hst_hist_idduplicates drop pzn_id_mod, forcesum pzn_nolocal x=1while `x'<4 {sort atc`x'_idegen atc`x'_no=group(atc`x'_id)sum atc`x'_no, dlocal x=`x'+1}local x=1while `x'<4 {sort nfc`x'_idegen nfc`x'_no=group(nfc`x'_id)sum nfc`x'_no, dlocal x=`x'+1}countsave npi_human1.dta, replace**Harmonize names of generic manufacturers gen producer_name=producerreplace producer="Stada AG" if producer=="Stada"replace producer="Teva" if producer=="Teva Generics"replace producer="Wolff Bielefeld" if producer=="Wolff"replace producer="W�rwag" if producer=="Woerwag"*Reimport indicator*retail form/PZNgen re_index=0replace re_index=1 if reimport==">"                               *firmgen firm_re_index=regexm(producer,">")tab firm_re_index	by producer, sort: egen help=mean(firm_re_index)tab helpdrop help	          tab firm_re_index re_index                *Note: some firms are classified as "no reimport" that actually have reimport products in their portfolio *Drug type classificationgenerate drug_code= .replace  drug_code=0 if drug_type=="keine Generikasituation"replace  drug_code=1 if drug_type=="Generika"replace  drug_code=2 if drug_type=="Originalprodukt"replace  drug_code=3 if drug_type=="Patent"tab drug_codetab gen_id*Generate Indices ** Producersort hst_idegen all_producer_no=group(producer)sum all_producer_no, d*exclude pharmacies among group of producersgen index1 = strpos(producer,"Ap.")tab producer if index1!=0gen index2 = strpos(producer,"-A.")tab producer if index2!=0gen index3 = strpos(producer,"Apo.")tab producer if index3!=0gen index4 = strpos(producer,"Apoth.")tab producer if index4!=0gen index5 = strpos(producer,"Apotheke")tab producer if index5!=0sort hst_idegen producer_no=group(producer) if index1==0 & index2==0 & index3==0 & index4==0 & index5==0 sum producer_no, d** Parent firm sort konz_idegen all_parent_no=group(parent)sum all_parent_no, dsort konz_idegen parent_no=group(parent) if index1==0 & index2==0 & index3==0 & index4==0 & index5==0 sum parent_no, ddrop index**Drop Pharmaciescount if producer_no== .count if parent_no== .count if parent_no== . & producer_no== .drop if producer_no== .drop all_producer_no all_parent_nodrop pzn_no pzn_mod_indexsort pzn_id_modegen pzn_no=group(pzn_id_mod)sum pzn_no, dsort prd_idegen product_no=group(product)sum product_no, dsort sub_idegen sub_no=group(substances)sum sub_no, d                                 *Market Attractivness: Revenues at Producer and Retail Prices local y=1999quietly while `y'<2008 {by sub_no, sort: egen marketsize`y'=total(total_rev`y') local y=`y'+1}sum marketsize*, dlocal y=2002quietly while `y'<2008 {by sub_no, sort: egen retmarketsize`y'=total(total_ret_rev`y') local y=`y'+1}sum retmarketsize*, dsave npi_human1.dta, replace*** Identify potential generic entrants (active genericmanufacturers) *(Generic) retail form portfolio generate pzn_portfolio= .sort producer_no pzn_id_modegen help1=tag(producer_no pzn_id_mod) by producer_no, sort: egen help2=total(help1) if help1==1by producer_no, sort: egen help3=max(help2) by producer_no, sort: replace pzn_portfolio=help3drop help*sum pzn_portfolio, dgenerate generic_pznportfolio= .sort producer_no pzn_id_modegen help1=tag(producer_no pzn_id_mod) if drug_code==1by producer_no, sort: egen help2=total(help1) if help1==1by producer_no, sort: egen help3=max(help2) by producer_no, sort: replace generic_pznportfolio=help3by producer_no, sort: replace generic_pznportfolio=0 if help3== . & producer_no!= .drop help*sum generic_pznportfolio, dby producer_no, sort: gen generic_pznshare=generic_pznportfolio/pzn_portfolio if producer_no!= .sum generic_pznshare, degen help=tag(producer_no) if firm_re_index==0 count if help==1 & generic_pznshare>=0.5drop help*Active generic drug manufacturers (no reimport)egen act_generics_no=group(producer_no) if generic_pznshare>=0.5  & firm_re_index==0 & generic_pznportfolio>=50sum act_generics_no**101 firmscompresssave npi_human1.dta, replace*******(II) MERGE NPI Data (Human medicines) and Patent/SPC datasort substancessum pzn_nojoinby substances using "PATENT_SPC\patent_spc_final.dta", unmatched(master) _merge(_merge) tab _msum pzn_nodrop patent_count spc_count *Consistency expiriy datescount if orignialexpiry1_num>originalexpiry2_num & orignialexpiry1!=""count if originalexpiry2_num>originalexpiry_num count if expirydatecurrent_num==expirydateexpected_numcount if expirydatecurrent_num<expirydateexpected_numcount if expirydatecurrent_num>expirydateexpected_num*list orignialexpiry1 originalexpiry2 expirydatecurrent expirydateexpected if expirydatecurrent_num>expirydateexpected_num*Most often one day differene across given expiry dates (in one case 7 months difference; year expiry 2015/2016)count if expirydatecurrent_num==expirydateexpected_num & expirydateexpected_year<=2007count if expirydatecurrent_num<expirydateexpected_num & expirydateexpected_year<=2007count if expirydatecurrent_num>expirydateexpected_num & expirydateexpected_year<=2007list orignialexpiry1 originalexpiry2 expirydatecurrent expirydateexpected if expirydatecurrent_num>expirydateexpected_num & expirydateexpected_year<=2007*One day difference, year of expiry is the same generate expiry_cut= .by sub_no, sort: egen help=max(expirydateexpected_year) replace expiry_cut=help if expiry_cut== . & _m==3drop helptab expiry_cut****(III) Drug Indices*Drug Matchessort sub_noegen sub_no_joint=group(sub_no) if _m==3sum sub_no_joint, d*329*Drug Matches with Patent/SPC Expiry until 2007sort sub_noegen sub_no_match=group(sub_no) if _m==3 & expiry_cut<=2007 sum sub_no_match, d*65rename _merge _joinby_patcompresssave npi_human2.dta, replace*** end of do file